benchmark = "cg" fix = "Class" if benchmark == "bt" or benchmark == "sp": processes = processes_onlyBTSP else: processes = processes_excludeBTSP benchmarkClasses = ["A", "B", "C", "D"] targetNumOfProcess = 256 fixedBenchmarkClass = "B" fixedProcess = 64 # 引数の条件に合った生の実験データを取得する DF = returnRawDFperBenchmark(Benchmark=benchmark, fix=fix, benchmarkClass=benchmarkClasses, FixedProcess=fixedProcess, Processes=processes, FixedBenchmarkClass=fixedBenchmarkClass) # 取得した生の実験データから NaN が含まれる関数の実験データを削除 noNaNDF = DF.dropna(how='any') # noNaNDF returnedCalculatedDF = return_calculatedDF(benchmark=benchmark, noNaNDF=noNaNDF, targetNumOfProcess=targetNumOfProcess, targetProblemSize=fixedBenchmarkClass, fix=fix) # returnedCalculatedDF # numOfData列の要素一覧を作成し、ソートされたリストを、listOfNumDataに格納する listOfNumOfData = returnedCalculatedDF['numOfData'].tolist() listOfNumOfData = sorted(list(set(listOfNumOfData))) # listOfNumOfData # 使用したプロファイル数をキー・最適モデルでの相対誤差の平均をバリューとした辞書を作成する dictAverageRelativeErrorOfBestModel = {} x = [] y = [] for numOfData in listOfNumOfData: # 使用したプロファイル数で抽出 extractedPerNumOfProfileDF = returnedCalculatedDF[returnedCalculatedDF['numOfData'] == numOfData] meanDF = extractedPerNumOfProfileDF.mean() data = meanDF.at['relativeErrorOfBestModel'] dictAverageRelativeErrorOfBestModel[numOfData] = data x = list(dictAverageRelativeErrorOfBestModel.keys()) x y = [dictAverageRelativeErrorOfBestModel[key] for key in x] y plt.figure() plt.plot(x, y, marker='o') plt.xlabel("使用したプロファイル数[%]") plt.ylabel("平均絶対誤差率")
dictTmp = returnDictForPlotPerNumOfUsedData(Benchmark=benchmarks, fix="Class", benchmarkClass=[ "A", "B", "C", "D"], FixedProcess=64, Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256], FixedBenchmarkClass="C")pd.options.display.float_format = '{:.4g}'.format tmpDF = pd.DataFrame() for benchmark in benchmarks: listToLearn = [1, 2, 4, 8, 16, 32, 64, 128] listToPredict = [256] benchmark_x = dictTmp[benchmark]["x"] benchmark_y = dictTmp[benchmark]["y"] index = benchmark_x.index(len(listToLearn)) MAPE = benchmark_y[index] relativeCost = returnRelativeCost(benchmark=benchmark, variablesToLearn=listToLearn, variablesToPredict=listToPredict, fixedClassOrProcess="Class", fixed="C") dictRowData = {"ベンチマーク名": benchmark.upper( ), "平均絶対誤差率[%]": MAPE, "相対コスト[%]": relativeCost} iDF = pd.DataFrame.from_dict(dictRowData, orient='index').T tmpDF = tmpDF.append(iDF) tmpDFMean = tmpDF.mean() type(tmpDFMean) print(tmpDF.to_latex(index=False))# dictTmp plt.figure(figsize=(5.72, 4), dpi=200) for benchmark in list(dictTmp.keys()): x = dictTmp[benchmark]["x"] y = dictTmp[benchmark]["y"] plt.plot(x, y, marker='o', label=benchmark.upper()) plt.legend() plt.xlabel("使用したプロファイル数") plt.ylabel("平均絶対誤差率[%]")
# 実験用に小規模なリスト benchmarkNames = ["cg"] classes = ["B"] processes = [1, 2, 4, 8, 16, 32, 64, 128, 256]# 生データの取得 cgDF = returnCollectedExistingData(benchmarkNames=["cg"], classes=["A", "B", "C", "D"], processes=[ 1, 2, 4, 8, 16, 32, 64, 128, 256], csvDirPath="./csv_files/") cgDF # ベンチマーククラスがAの情報を取得 cgDFfixedA = cgDF[cgDF["benchmarkClass"] == "A"] cgDFfixedA # 関数名のリストを取得 functionNames = sorted(list(set(cgDFfixedA["functionName"]))) print(functionNames) # 関数名を関数名のリストから抽出 functionNameCG = cgDFfixedA[cgDFfixedA["functionName"] == "CG"] functionNameCG # 説明変数と目的変数とをリスト化したものを抽出 # プロセス数 raw_x = functionNameCG['process'].tolist() # 関数コール回数 raw_y = functionNameCG['functionCallNum'].tolist() print(f"raw_x={raw_x}") print(f"raw_y={raw_y}") bencmarkName = "CG" functionName = "CG" fixProcessOrClass = "Class" fixed = "A" targetProcess = 256 targetBenchmarkClass = fixed targetFunctionCallNum = raw_y[-1] returnSeriesOfData(benchmarkName="benhmarkName", functionName="functionName", rawX=[1, 2, 3], rawY=[ 1, 2, 3], fixProcessOrClass="Class", fixed="B", targetProcess=256, targetBenchmarkClass="B", targetFunctionCallNum=-1, csvDirPath="./csv_files/")
pd.DataFrame(listForRelativeErrorTable, columns=columnsNames)
# 緊急 プロジェクト別ゼミで、分岐点検出アルゴリズムの更新にあたって、旧アルゴリズムと新アルゴリズムの比較を行う必要が出てきた。 相対誤差率・適合度の値がデータとなっている二つの表を作る。 旧アルゴリズムと新アルゴリズムとを別々のモデルとして検証する。
benchmarkNamesExcludeBTSP = ["cg", "ep", "ft", "is", "lu", "mg"] # classes = ["A", "B", "C", "D"] classes = ["B"] processes = [2, 4, 8, 16, 32, 64, 128, 256] targetIndex = -1 csvDirPath = "./csv_files/" modelNames = ["ModelLin", "ModelIp", "ModelLog", "ModelBranch2"] dfByDatumExcludeBTSP = returnDFSummarizedData( benchmarkNames=benchmarkNamesExcludeBTSP, classes=classes, processes=processes, targetIndex=targetIndex, csvDirPath=csvDirPath, modelNames=modelNames) dictForLatexTable = {} numOfData = 0 for benchmarkName in benchmarkNamesExcludeBTSP: dictForLatexTable[benchmarkName] = dfByDatumExcludeBTSP[dfByDatumExcludeBTSP["benchmarkName"] == benchmarkName] numOfData += len( dfByDatumExcludeBTSP[dfByDatumExcludeBTSP["benchmarkName"] == benchmarkName]) numOfData listForDF = [] for benchmarkName in benchmarkNamesExcludeBTSP: listForDF.append(returnSeriesOfDatumPerBenchmark( inputDF=dictForLatexTable[benchmarkName], modelNames=["ModelIp", "ModelLog", "ModelBranch2", "ModelLin"])) DF = pd.DataFrame(listForDF) print(DF.to_latex(index=False))columnNamesForCheck = ["functionName", "usedDataX", "usedDataY"] listForDF2 dictForLatexTable[benchmarkName][columnNamesForCheck+["MAPEOfBranchModel"]] dictForLatexTable2[benchmarkName][columnNamesForCheck+["MAPEOfBranchModel2"]]# 線形飽和モデルが最適となるデータ群の一つである GONJ_GRAD() で実験する dfCG = dictForLatexTable["cg"] CONJ_GRAD_DF = dfCG[dfCG["functionName"] == "CONJ_GRAD"] x = list(CONJ_GRAD_DF["usedDataX"])[0] y = list(CONJ_GRAD_DF["usedDataY"])[0] CONJ_GRAD = {"x": x, "y": y} plt.figure() # 生データのプロット plt.scatter(x, y) # 分岐モデルで予測しプロット modelBranch = ModelBranch(trainX=x, trainY=x) modelBranch.calcLr() predictedY = modelBranch.predict(x) plt.plot(x, predictedY) # 分岐モデル2で予測しプロット modelBranch2 = ModelBranch2(trainX=x, trainY=y) modelBranch2.calcLr() predictedY = modelBranch2.predict(x) plt.plot(x, predictedY)modelBranch2.branchIndex
# プロジェクト別ゼミ用資料作成 # ipynb形式のライブラリのインポート # %run ./lib/lib.ipynb branchIndex = 11 dataBase1 = list(range(branchIndex)) dataBase2 = [branchIndex] * (21 - 11) dataList1 = [3 * x + 4 for x in dataBase1] dataList2 = [3 * x + 4 for x in dataBase2] dataBase2 = list(range(11, 21)) dataListY = dataList1 + dataList2 dataListX = dataBase1 + dataBase2 # プロット plt.figure() plt.scatter(dataListX, dataListY) # 分岐モデル modelBranch = ModelBranch(trainX=dataListX, trainY=dataListY) modelBranch.calcLr() predictedY = modelBranch.predict(dataListX) plt.plot(dataListX, predictedY) # 分岐モデル2 modelBranch2 = ModelBranch2(trainX=dataListX, trainY=dataListY) modelBranch2.calcLr() predictedY = modelBranch2.predict(dataListX) plt.plot(dataListX, predictedY) # データ2:分岐点で異なる線形モデルに変化 branchIndex = 10 dataBase1 = list(range(branchIndex)) dataBase2 = list(range(branchIndex, 20)) dataList1 = [x + 1 for x in dataBase1] dataList2 = [0.5 * x + 6 for x in dataBase2] dataListY = dataList1 + dataList2 dataListX = dataBase1 + dataBase2 # プロット plt.figure() plt.scatter(dataListX, dataListY) # 分岐モデル modelBranch = ModelBranch(trainX=dataListX, trainY=dataListY) modelBranch.calcLr() predictedY = modelBranch.predict(dataListX) plt.plot(dataListX, predictedY) # 分岐モデル2 modelBranch2 = ModelBranch2(trainX=dataListX, trainY=dataListY) modelBranch2.calcLr() predictedY = modelBranch2.predict(dataListX) plt.plot(dataListX, predictedY) # データ3:分岐点無し branchIndex = 22 dataBase1 = list(range(branchIndex)) dataListY = [5 * x + 6 for x in dataBase1] dataListX = dataBase1 # プロット plt.figure() plt.scatter(dataListX, dataListY) # 分岐モデル modelBranch = ModelBranch(trainX=dataListX, trainY=dataListY) modelBranch.calcLr() predictedY = modelBranch.predict(dataListX) plt.plot(dataListX, predictedY) # 分岐モデル2 modelBranch2 = ModelBranch2(trainX=dataListX, trainY=dataListY) modelBranch2.calcLr() predictedY = modelBranch2.predict(dataListX) plt.plot(dataListX, predictedY)
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"] # classes = ["A", "B", "C", "D"] classes = ["B"] processes = [2, 4, 8, 16, 32, 64, 128, 256] targetIndex = -1 csvDirPath = "./csv_files/" modelNames = ["ModelBranch", "ModelIp", "ModelLog", "ModelLin"] modelNames2 = ["ModelBranch2", "ModelIp", "ModelLog", "ModelLin"] dfByDatumCG = returnDFSummarizedData( benchmarkNames=benchmarkNames, classes=classes, processes=processes, targetIndex=targetIndex, csvDirPath=csvDirPath, modelNames=modelNames, ) dfByDatumCG2 = returnDFSummarizedData( benchmarkNames=benchmarkNames, classes=classes, processes=processes, targetIndex=targetIndex, csvDirPath=csvDirPath, modelNames=modelNames2, ) dictForLatexTable = {} dictForLatexTable2 = {} numOfData = 0 numOfData2 = 0 for benchmarkName in benchmarkNames: dictForLatexTable[benchmarkName] = dfByDatumCG[ dfByDatumCG["benchmarkName"] == benchmarkName ] numOfData += len(dfByDatumCG[dfByDatumCG["benchmarkName"] == benchmarkName]) dictForLatexTable2[benchmarkName] = dfByDatumCG2[ dfByDatumCG2["benchmarkName"] == benchmarkName ] numOfData2 += len(dfByDatumCG2[dfByDatumCG2["benchmarkName"] == benchmarkName]) listForDF = [] for benchmarkName in benchmarkNames: listForDF.append( returnSeriesOfDatumPerBenchmark( inputDF=dictForLatexTable[benchmarkName], modelNames=modelNames ) ) DF = pd.DataFrame(listForDF) DF = DF.sort_values("benchmarkName") columnNameList = ["ベンチマーク名(関数の個数)", "線形飽和モデル", "反比例モデル", "対数モデル", "線形モデル"] DF.columns = columnNameList print(DF.to_latex(index=False)) listForDF2 = [] for benchmarkName in benchmarkNames: listForDF2.append( returnSeriesOfDatumPerBenchmark( inputDF=dictForLatexTable2[benchmarkName], modelNames=modelNames2 ) ) DF = pd.DataFrame(listForDF2) DF = DF.sort_values("benchmarkName") columnNameList = ["ベンチマーク名(関数の個数)", "線形飽和モデル", "反比例モデル", "対数モデル", "線形モデル"] DF.columns = columnNameList print(DF.to_latex(index=False))forInputDF = returnDFSummarizedData( benchmarkNames=["cg", "ep", "ft", "is", "lu", "mg"], classes=["B"], processes=[2, 4, 8, 16, 32, 64, 128, 256], targetIndex=-1, csvDirPath="./csv_files/", modelNames=["ModelBranch", "ModelIp", "ModelLog", "ModelLin"], ) benchmarkNames = list(set(forInputDF["benchmarkName"].tolist())) benchmarkNames.sort() columnsNames = ["ベンチマーク名(関数の個数)", "MAPE(予測対象関数コール回数に対する)"] listForRelativeErrorTable = [] for benchmarkName in benchmarkNames: forInputDFPerBenchmark = forInputDF[forInputDF["benchmarkName"] == benchmarkName] column1 = f"{benchmarkName.upper()}({len(forInputDFPerBenchmark)})" seriesOfMean = forInputDFPerBenchmark.mean() seriesOfMeanRelativeErrorRate = seriesOfMean["RelativeErrorRate"] column2 = int(seriesOfMeanRelativeErrorRate * 100) / 100 listForRelativeErrorTable.append([column1, column2]) DFtoLatex = pd.DataFrame(listForRelativeErrorTable, columns=columnsNames) print(DFtoLatex.to_latex(index=False)) forInputDF = returnDFSummarizedData( benchmarkNames=["cg", "ep", "ft", "is", "lu", "mg"], classes=["B"], processes=[2, 4, 8, 16, 32, 64, 128, 256], targetIndex=-1, csvDirPath="./csv_files/", modelNames=["ModelBranch2", "ModelIp", "ModelLog", "ModelLin"], ) benchmarkNames = list(set(forInputDF["benchmarkName"].tolist())) benchmarkNames.sort() print(f"benchmarkNames={benchmarkNames}") columnsNames = ["ベンチマーク名(関数の個数)", "MAPE(予測対象関数コール回数に対する)"] listForRelativeErrorTable = [] for benchmarkName in benchmarkNames: forInputDFPerBenchmark = forInputDF[forInputDF["benchmarkName"] == benchmarkName] column1 = f"{benchmarkName.upper()}({len(forInputDFPerBenchmark)})" seriesOfMean = forInputDFPerBenchmark.mean() seriesOfMeanRelativeErrorRate = seriesOfMean["RelativeErrorRate"] column2 = int(seriesOfMeanRelativeErrorRate * 100) / 100 listForRelativeErrorTable.append([column1, column2]) DFtoLatex = pd.DataFrame(listForRelativeErrorTable, columns=columnsNames) print(DFtoLatex.to_latex(index=False))DFtoLatex
# 予測を行う。一つの関数・変数(コア数・各種ベンチマーク由来の初期化変数) benchmarkNames = ["cg"] benchmarkName = "cg" classes = ["S", "W", "A", "B", "C", "D", "E", "F"] targetClass = "F" processes = [1, 2, 4, 8, 16, 32, 64, 128, 256] targetProcess = 256 # データを取得 rawDF = returnCollectedExistingData( benchmarkNames=benchmarkNames, classes=classes, processes=processes, csvDirPath="./csv_files/", ) rawDF = addInitDataToRawDF(rawDF) # 結果(初期変数含む)を格納するためのDF resultDictMulti = pd.DataFrame( columns=["functionName", "modelLin", "modelIp", "modelLog"] ) # 結果(初期変数含まない)を格納するためのDF resultDictSingle = pd.DataFrame( columns=["functionName", "modelLin", "modelIp", "modelLog"] ) # モデルを用いた処理を実施 functionNames = sorted(list(set(rawDF["functionName"]))) for functionName in functionNames: # print(f"----------functionName={functionName}----------") ##関数ごとにrawDFを抽出 DFExtractedByFunction = rawDF[rawDF["functionName"] == functionName].copy() ## 説明変数用に問題サイズ列を数値化した列を追加する strListProblemSize = DFExtractedByFunction["benchmarkClass"].tolist() intListProblemSize = convertBenchmarkClasses_problemSizeInNPB( inputList=strListProblemSize ) DFExtractedByFunction["intBenchmarkClass"] = intListProblemSize ## 3モデルでMAPEを出力 ### 目的変数のリストを作成 resVar = ["functionCallNum"] ### 重回帰分析(初期変数を含めた) #### 列名のリストをDFから取得 expVarMulti = DFExtractedByFunction.columns.tolist() #### 除外する列名を除く for removeElement in [ "functionName", "functionCallNum", "benchmarkName", "benchmarkClass", ]: expVarMulti.remove(removeElement) #### 3モデル(線形・反比例・対数)を同時に作成 threeModelsByMulti = Models( inputDF=DFExtractedByFunction, expVarColNames=expVarMulti, resVarColNames=resVar, targetDF=None, modelNames=["modelLin", "modelIp", "modelLog"], ) threeModelsByMulti.setUpDataBeforeCalcLr() threeModelsByMulti.calcLr() threeModelsByMulti.calcMAPE() dictCalcedMAPE = threeModelsByMulti.returnCalculatedMAPE() # dictCalcedMAPEの数値を小数第一位までにする for key in dictCalcedMAPE.keys(): dictCalcedMAPE[key] = int(dictCalcedMAPE[key] * 10) / 10 # print(f"multi = {dictCalcedMAPE}") #### 関数ごとの結果に格納 seriesMulti = pd.Series( { "functionName": functionName, "modelLin": dictCalcedMAPE["modelLin"], "modelIp": dictCalcedMAPE["modelIp"], "modelLog": dictCalcedMAPE["modelLog"], } ) resultDictMulti = resultDictMulti.append(seriesMulti, ignore_index=True) ### 単回帰分析(初期変数を含めない) expVarSingle = ["process", "intBenchmarkClass"] threeModelsBySingle = Models( inputDF=DFExtractedByFunction, expVarColNames=expVarSingle, resVarColNames=resVar, targetDF=None, modelNames=["modelLin", "modelIp", "modelLog"], ) threeModelsBySingle.setUpDataBeforeCalcLr() threeModelsBySingle.calcLr() threeModelsBySingle.calcMAPE() dictCalcedMAPE = threeModelsBySingle.returnCalculatedMAPE() # dictCalcedMAPEの数値を小数第一位までにする for key in dictCalcedMAPE.keys(): dictCalcedMAPE[key] = int(dictCalcedMAPE[key] * 10) / 10 # print(f"single = {dictCalcedMAPE}") #### 関数ごとの結果に格納 seriesSingle = pd.Series( { "functionName": functionName, "modelLin": dictCalcedMAPE["modelLin"], "modelIp": dictCalcedMAPE["modelIp"], "modelLog": dictCalcedMAPE["modelLog"], } ) resultDictSingle = resultDictSingle.append(seriesSingle, ignore_index=True) # Latex用に出力 ## 列名を日本語化 dictColumnNamesJP = { "functionName": "関数名", "modelLin": "線形モデル", "modelIp": "反比例モデル", "modelLog": "対数モデル", } resultDictMulti = resultDictMulti.rename(columns=dictColumnNamesJP) resultDictSingle = resultDictSingle.rename(columns=dictColumnNamesJP) ## 関数名列を行名にする resultDictMulti = resultDictMulti.set_index("関数名") resultDictSingle = resultDictSingle.set_index("関数名") # 出力して確認 print(resultDictMulti.to_latex(caption="初期変数を含む")) print(resultDictSingle.to_latex(caption="初期変数を含まない")) # モデルの作成に使用したデータの条件(コア数・問題サイズ)の表を作成 ## 行列名のみの表を作成 columnNames = [2, 4, 8, 16, 32, 64, 128, 256] indexNames = ["S", "W", "A", "B", "C", "D", "E", "F"] usedTableDF = pd.DataFrame(columns=columnNames, index=indexNames) ## 関数名を指定(CG) functionName = "CG" oneFunctionRawDF = rawDF[rawDF["functionName"] == functionName] for columnName in columnNames: for indexName in indexNames: extracted = oneFunctionRawDF[ (oneFunctionRawDF["process"] == columnName) & (oneFunctionRawDF["benchmarkClass"] == indexName) ] if len(extracted) == 1: usedTableDF.at[indexName, columnName] = 1 else: usedTableDF.at[indexName, columnName] = 0 print(usedTableDF.to_latex())
# 予測を行う。一つの関数・変数(コア数・各種ベンチマーク由来の初期化変数) benchmarkNames = ["cg"] benchmarkName = "cg" classes = ["S", "W", "A", "B", "C", "D", "E", "F"] targetClass = "F" processes = [128] targetProcess = 256 # データを取得 rawDF = returnCollectedExistingData( benchmarkNames=benchmarkNames, classes=classes, processes=processes, csvDirPath="./csv_files/", ) rawDF = addInitDataToRawDF(rawDF) # 結果(初期変数含む)を格納するためのDF resultDictMulti = pd.DataFrame( columns=["functionName", "modelLin", "modelIp", "modelLog"] ) # 結果(初期変数含まない)を格納するためのDF resultDictSingle = pd.DataFrame( columns=["functionName", "modelLin", "modelIp", "modelLog"] ) # モデルを用いた処理を実施 functionNames = sorted(list(set(rawDF["functionName"]))) for functionName in functionNames: # print(f"----------functionName={functionName}----------") ##関数ごとにrawDFを抽出 DFExtractedByFunction = rawDF[rawDF["functionName"] == functionName].copy() ## 説明変数用に問題サイズ列を数値化した列を追加する strListProblemSize = DFExtractedByFunction["benchmarkClass"].tolist() intListProblemSize = convertBenchmarkClasses_problemSizeInNPB( inputList=strListProblemSize ) DFExtractedByFunction["intBenchmarkClass"] = intListProblemSize ## 3モデルでMAPEを出力 ### 目的変数のリストを作成 resVar = ["functionCallNum"] ### 重回帰分析(初期変数を含めた) #### 列名のリストをDFから取得 expVarSingle = DFExtractedByFunction.columns.tolist() #### 除外する列名を除く for removeElement in [ "functionName", "functionCallNum", "benchmarkName", "benchmarkClass", ]: expVarSingle.remove(removeElement) #### 3モデル(線形・反比例・対数)を同時に作成 threeModelsByMulti = Models( inputDF=DFExtractedByFunction, expVarColNames=expVarSingle, resVarColNames=resVar, targetDF=None, modelNames=["modelLin", "modelIp", "modelLog"], ) threeModelsByMulti.setUpDataBeforeCalcLr() threeModelsByMulti.calcLr() threeModelsByMulti.calcMAPE() dictCalcedMAPE = threeModelsByMulti.returnCalculatedMAPE() # dictCalcedMAPEの数値を小数第一位までにする for key in dictCalcedMAPE.keys(): dictCalcedMAPE[key] = int(dictCalcedMAPE[key] * 10) / 10 # print(f"multi = {dictCalcedMAPE}") #### 関数ごとの結果に格納 seriesMulti = pd.Series( { "functionName": functionName, "modelLin": dictCalcedMAPE["modelLin"], "modelIp": dictCalcedMAPE["modelIp"], "modelLog": dictCalcedMAPE["modelLog"], } ) resultDictMulti = resultDictMulti.append(seriesMulti, ignore_index=True) ### 単回帰分析(初期変数を含めない) expVarMulti = ["process", "intBenchmarkClass"] threeModelsBySingle = Models( inputDF=DFExtractedByFunction, expVarColNames=expVarMulti, resVarColNames=resVar, targetDF=None, modelNames=["modelLin", "modelIp", "modelLog"], ) threeModelsBySingle.setUpDataBeforeCalcLr() threeModelsBySingle.calcLr() threeModelsBySingle.calcMAPE() dictCalcedMAPE = threeModelsBySingle.returnCalculatedMAPE() # dictCalcedMAPEの数値を小数第一位までにする for key in dictCalcedMAPE.keys(): dictCalcedMAPE[key] = int(dictCalcedMAPE[key] * 10) / 10 # print(f"single = {dictCalcedMAPE}") #### 関数ごとの結果に格納 seriesSingle = pd.Series( { "functionName": functionName, "modelLin": dictCalcedMAPE["modelLin"], "modelIp": dictCalcedMAPE["modelIp"], "modelLog": dictCalcedMAPE["modelLog"], } ) resultDictSingle = resultDictSingle.append(seriesSingle, ignore_index=True) # Latex用に出力 ## 列名を日本語化 dictColumnNamesJP = { "functionName": "関数名", "modelLin": "線形モデル", "modelIp": "反比例モデル", "modelLog": "対数モデル", } resultDictMulti = resultDictMulti.rename(columns=dictColumnNamesJP) resultDictSingle = resultDictSingle.rename(columns=dictColumnNamesJP) ## 関数名列を行名にする resultDictMulti = resultDictMulti.set_index("関数名") resultDictSingle = resultDictSingle.set_index("関数名") # 出力して確認 print( resultDictMulti.to_latex( caption="初期変数を含めて作成したモデルのMAPE", label="tab:20211025includeInitVar" ) ) print( resultDictSingle.to_latex( caption="初期変数を含めずに作成したモデルのMAPE", label="tab:20211025excludeInitVar" ) )# モデルの作成に使用したデータの条件(コア数・問題サイズ)の表を作成 ## 行列名のみの表を作成 columnNames = [2, 4, 8, 16, 32, 64, 128, 256] indexNames = ["S", "W", "A", "B", "C", "D", "E", "F"] usedTableDF = pd.DataFrame(columns=columnNames, index=indexNames) ## 関数名を指定(CG) functionName = "CG" oneFunctionRawDF = rawDF[rawDF["functionName"] == functionName] for columnName in columnNames: for indexName in indexNames: extracted = oneFunctionRawDF[ (oneFunctionRawDF["process"] == columnName) & (oneFunctionRawDF["benchmarkClass"] == indexName) ] if len(extracted) == 1: usedTableDF.at[indexName, columnName] = 1 else: usedTableDF.at[indexName, columnName] = 0 print(usedTableDF.to_latex())
inputDF = dfInit funcNames = inputDF.index.to_list() modelNames = inputDF.columns.to_list() inputDF["最低値"] = math.inf for funcName in funcNames: lowestInFunc = math.inf seriesInFunc = inputDF.loc[funcName] for modelName in modelNames: elem = seriesInFunc[modelName] if elem < lowestInFunc: lowestInFunc = elem inputDF.at[funcName, "最低値"] = lowestInFunc
inputDict = {"benchmarkName": inputDF} benchmarkNames = [] lowestMAPEAverage = [] for benchmarkName in inputDict.keys(): averageSeries = inputDict[benchmarkName].mean() benchmarkNames.append(benchmarkName) lowestMAPEAverage.append(averageSeries["最低値"]) resultDF = pd.DataFrame( index=benchmarkNames, data=lowestMAPEAverage, columns=["平均絶対誤差"] ) resultDFinputDFdfBase